# coding:utf8
from bs4 import BeautifulSoup
import re
class HtmlParser(object):
    def parser(self, new_url, html_cont):
        soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
                    ################爬取新url###################
        link=soup.find_all("a",href=re.compile(r'/uoj/register/displayActivity\.html'))
        pre="http://acm.scau.edu.cn:8000/"
        linklist=[]
        for i in link:
            linklist.append(pre+i['href'])
                    ################爬取页面内容###################
        contentlist=[]
        html_take = soup.find_all("table", class_="table")[0]
        html_take = html_take.find_all('td')
        i=0
        while i!=len(html_take):
            i+=1
            if i%6==0:
                contentlist.append("")
                continue
            contentlist.append(html_take[i].get_text())
        return  linklist,contentlist
